import jieba
from wordcloud import WordCloud
from collections import Counter
import matplotlib.pyplot as plt
from PIL import Image
from jieba import posseg

#停用词表
stop_word=[]
with open(r'C:\Users\User\Desktop\code\pycode\lab2\baidu_stopwords.txt',encoding="utf-8") as fs:
    for line1 in fs.readlines():
        stop_word.append(line1.strip('\n'))
#所有文本
text_final=[]
with open(r"C:\Users\User\Desktop\code\pycode\lab2\weibo.txt","r",encoding="utf-8") as f:#同一个文件夹下可以不用路径
    #print(f.readlines())#太长了，返回的一个列表
    for line in f.readlines():
        line_cut=line.split('\t')#按 \t’分割#去除噪声
        a=jieba.cut(line_cut[1])
        for i in a:
            if i not in stop_word:
                text_final.append(i)
                
tfreq = Counter(text_final)

# 设置参数，创建WordCloud对象
wc = WordCloud(
    width=200,                  # 设置宽为400px
    height=150,                 # 设置高为300px
    background_color='white',    # 设置背景颜色为白色
    stopwords="stopwords",         # 设置禁用词，在生成的词云中不会出现set集合中的词
    max_font_size=100,           # 设置最大的字体大小，所有词都不会超过100px
    min_font_size=10,            # 设置最小的字体大小，所有词都不会超过10px
    max_words=100,                # 设置最大的单词个数
    scale=2,                     # 扩大两倍
    font_path="C:\\Users\\User\\Desktop\\STXINGKA.TTF",
    repeat=False
)

wc.fit_words(tfreq)
wc.to_file(r"C:\Users\User\Desktop\code\pycode\lab2\weibo.png")
ima=Image.open(r"C:\Users\User\Desktop\code\pycode\lab2\weibo.png")
ima.show()